import scrapy
from myDistributedSpider.items import MydistributedspiderItem
from scrapy_redis.spiders import RedisSpider


class ItcastSpider(RedisSpider):
    name = 'itcast'  # 爬虫名称
    allowed_domains = ['itcast.cn']  # 爬虫搜索的域名范围
    redis_key = 'itcast:start_urls'  # 指定Redis数据库从哪里获取的初始URL

    def parse(self, response):
        items = []  # 存储所有讲师的信息
        for each in response.xpath("//div[@class='li_txt']"):
            # 创建MyspiderItem类的对象
            item = MydistributedspiderItem()
            # 使用XPath的路径表达式选取节点
            name = each.xpath("h3/text()").extract()
            level = each.xpath("h4/text()").extract()
            resume = each.xpath("p/text()").extract()
            # 将每个讲师的信息封装成MyspiderItem类的对象
            item["name"] = name[0]
            item["level"] = level[0]
            item["resume"] = resume[0]
        return items  # 直接返回数据，而不交给管道组件
        # yield items
